/* 
    Purpose: Using cleaned 1936 Consumption Survey data,
             this file creates income scores at the race-only,
             south-only, race x south, and occupation x south
             levels of variation. Income scores are then 
             merged together to create one output file.

    Note: Light cleaning of the data occurs prior to
          construction of the income scores.

    Creates: ConsumptionSurvey_1936_othervariations.dta
*/

clear 
set more off

cd "$Mydirectory1/1_DataSources/ConsumptionSurvey_1936/"    

*-----------------------------------------------------------------------------*
*-----------------------------------------------------------------------------*

*************************************
*** IMPORT AND CLEAN DATA
*************************************

use "./RawData/pooled_data_w_occ_codes.dta", clear 

    keep husband_age tot_fam_inc south occ1_harm white mem1_wagetot mem1_wagetot0 fam_wagetot fam_wagetot0 totkids_3

* Check that age is 30-50
    tab husband_age
    rename husband_age age
    
* Income variable
    sum tot_fam_inc, d

/*
   Convert income variables to 1950 dollars using the CPI: 
   Source: https://www.minneapolisfed.org/about-us/monetary-policy/inflation-calculator/consumer-price-index-1913-)
*/
    gen CPI1935 = 13.8 //13.7 for 1935 and 13.9 for 1936
    gen CPI1950= 24.1
        
    foreach var of varlist tot_fam_inc {
        replace `var' = `var' * (CPI1950 / CPI1935)
    }
    drop CPI*
    
* Region of residence
    tab south
    rename south south_merge
    
* Race 
    tab white, m
    gen race=.
    replace race = 1 if white==1
    replace race=2 if white==0
    tab race white, m

    keep if race==1 | race==2 //keep black and white respondents
    drop white

* Occupation
    tab occ1_harm
    rename occ1_harm occ1950ej

* Weight---note: no survey weights 
    gen weight =1
    
/* Drop observations with coarsened occupation 
   outside the range of interest */
    drop if occ1950ej>81 
    
* Drop observations w/ missing or zero income
    drop if tot_fam_inc==. | tot_fam_inc==0
    
    gen number=1
    gen tag=1

    tempfile fulldata 
    save `fulldata'
    
*-----------------------------------------------------------------------------*
*-----------------------------------------------------------------------------*

*******************
*** TEMPLATES
*******************
    * Template 1: race
        preserve
            collapse (rawsum) tag, by(race)
            
            drop tag
            tempfile template_byrace
            save `template_byrace'
                
        restore 

    * Template 2: south
        preserve 
            collapse (rawsum) tag, by(south_merge)
            
            drop tag
            tempfile template_bysouth
            save `template_bysouth'
                
        restore 

    * Template 3: occupation x south
        preserve
            collapse (rawsum) tag, by(occ1950ej)
                    
            expand 2, gen(south_merge)
            
            drop tag
            tempfile template_byocc_bys
            save `template_byocc_bys'
                
        restore 

    * Template 4: race x south
        preserve
            collapse (rawsum) tag, by(race)
            
            expand 2, gen(south_merge)
                    
            drop tag
            tempfile template_byr_bys
            save `template_byr_bys'
                
        restore 

    * Template 5: occ x race x south
     /*Note: Will need this level of variation
             later when merging income scores
             at different levels.*/ 
    preserve 
        collapse (rawsum) tag, by(occ1950ej)
        
        expand 2, gen(race)
        replace race=race+1
        
        expand 2, gen(south_merge)
        
        drop tag
        tempfile template_byocc_byr_bys
        save `template_byocc_byr_bys'
            
    restore 

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

*******************
*** COLLAPSE 
*******************
    /*Note: No weight is available for 1936.*/

    use `fulldata', clear 

    foreach x in byrace bysouth byocc_bys byr_bys {

        if "`x'"=="byrace" local cell "race"
        if "`x'"=="bysouth" local cell "south_merge"
        if "`x'"=="byocc_bys" local cell "occ1950ej south_merge"
        if "`x'"=="byr_bys" local cell "race south_merge"

    preserve 

        summ occ1950ej
        
        collapse (rawsum) number (mean) tot_fam_inc, by(`cell') 
  
        ren tot_fam_inc avg_totfaminc_1936_`x'
        label var avg_totfaminc_1936_`x' "Coarse 1936 income score, avg, in `cell' cell (2015$)"
      
        tempfile incomescores
        save `incomescores'

        use `template_`x''
        merge 1:1 `cell' using `incomescores', nogen
        
        replace number=0 if number==.
        label var number "Number of obs in cell"
        rename number number_1936obs_`x'

        tempfile incomescores_`x'_1936
        save `incomescores_`x'_1936'
        
    restore             
    }

/*Note: Although at least one cell is missing at the 
        occupation x south level, there's no need for 
        imputations, as 1936 data is only used to 
        adjust 1940 predicted income for farmers 
        and self-employed persons. Neither of those 
        occupation x south cells is missing. */

*************
* SAVE
*************

    use `template_byocc_byr_bys', clear 

    merge m:1 occ1950ej south_merge using `incomescores_byocc_bys_1936', assert(3) nogen 
    merge m:1 race south_merge using `incomescores_byr_bys_1936', assert(3) nogen
    merge m:1 south_merge using `incomescores_bysouth_1936', assert(3) nogen
    merge m:1 race using `incomescores_byrace_1936', assert(3) nogen
   
    sort occ1950ej race south_merge
    compress    
    save ./output/ConsumptionSurvey_1936_othervariations.dta, replace 

